Neural Network for forecasting

Package

library(tidyverse)
library(readxl)
library(timetk)
library(skimr)
library(datawizard)
library(neuralnet)
library(yardstick)

Import Data

df <- read_excel("Data_PDRB_siap2.xlsx",sheet = 2)
glimpse(df)
Rows: 36
Columns: 9
$ date              <chr> "2015-03-01", "2015-06-01", "2015-09-01", "2015-12-0…
$ ADHK              <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg          <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor            <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang        <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk               <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi               <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk               <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…
df <- mutate(df,date = ymd(date))
glimpse(df)
Rows: 36
Columns: 9
$ date              <date> 2015-03-01, 2015-06-01, 2015-09-01, 2015-12-01, 201…
$ ADHK              <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg          <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor            <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang        <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk               <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi               <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk               <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…

Grafik Time Series

Fungsi pembantu

Code
multi_plot_time_series <- function(data,date,exclude_var=NULL,.interactive=TRUE,n_col=2,n_row=2,.title="Multiple Time Series"){
data %>% 
  select(-all_of(exclude_var)) %>% 
  select(all_of(date),where(is.numeric)) %>% 
  pivot_longer(cols = -all_of(date),
               names_to = "variable",
               values_to = "value") %>% 
  group_by(variable) %>% 
    plot_time_series(.data=,
                 .date_var = date,
                 .value = value,
                 .interactive = .interactive,
                 .title =  .title,
                 .facet_ncol = n_col,
                 .facet_nrow = n_row,
                 .smooth = FALSE)
}  

Single Time Series

plot_time_series(.data=df,
                 .date_var = date,
                 .value = ADHK,
                 .interactive = TRUE,
                 .title =  "ADHK",
                 .smooth = FALSE)

Multiple Plot

multi_plot_time_series(df,
                       date = "date",
                       exclude_var = "ADHK",
                       .interactive=FALSE,
                       n_col = 2)

Multi Input Multi Output (MIMO)

Fungsi pembantu

Code
prepare_mimo_data <- function(data,
                              date_col,
                              input_vars,
                              output_vars,
                              lags       = 1:12,
                              horizon    = 1,
                              remove_na  = TRUE) {
  
  # Tidyeval the date column
  date_col <- rlang::enquo(date_col)
  
  # 1) Order by time index
  df_prep <- data %>%
    dplyr::arrange(!!date_col)
  
  # 2) Generate lagged inputs via timetk
  #    Creates columns like: sales_lag1, sales_lag2, ..., price_lag1, ...
  df_prep <- df_prep %>%
    timetk::tk_augment_lags(
      .value = all_of(input_vars),
      .lags  = lags
    )
  # 3) Generate future targets via dplyr::lead()
  #    Creates columns like: sales_h1, sales_h2, ...
  df_prep <- df_prep %>%
    timetk::tk_augment_leads(
      .value = all_of(output_vars),
      .lags  = -horizon
    )
  
      # Build vector of all generated column names
    lag_cols    <- df_prep %>% select(contains("lag")) %>% names()
    lead_cols   <- df_prep %>% select(contains("lead")) %>% names()
    all_new_cols <- c(sort(lag_cols,decreasing = TRUE), lead_cols)
  # 4) Optionally drop rows with NAs in any of the new columns
  if (remove_na) {
    
    df_prep <- df_prep %>%
      tidyr::drop_na(dplyr::all_of(all_new_cols))
  }
  
  # Return the prepared tibble
  df_prep <- df_prep %>% 
              dplyr::select(!!date_col,
                     dplyr::all_of(all_new_cols)) %>% 
              dplyr::rename("date_lg0"=!!date_col)
  #nm_df_prep <- df_prep %>% select(-!!date_col) %>% names()
  #date_nm <- df_prep %>% select(!!date_col) %>% names()
  #names(df_prep) <- c(date_nm,sort(nm_df_prep,decreasing = FALSE))
  return(df_prep)
}

Struktur Data MIMO

prepare_mimo_data(data = df,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:2,
                  remove_na = FALSE,
                  horizon = 1:4)

Time Series Nerual Network

Fungsi pembantu

Code
train_mlp_mimo <- function(formula = formula,data=data,hidden=hidden,activation="linear",...){
data0 <- data
data1 <- data %>% drop_na()
data <- standardize(data1,select = is.numeric,robust = TRUE)
data1 <- standardize(data0,select = is.numeric,robust = TRUE,reference=data1)

activation =  switch(
        activation,
        tanh   = function(x) tanh(x),
        linear = function(x) x,
        logistic ="logistic"
      )
model <- neuralnet(formula = formula,
                   data = data,hidden = hidden, act.fct = activation,...)
model$data0 <- data0
model$data1 <- data1
return(model)
}
Code
forecast_mlp <- function(model,real_out_cols){
df_dt <- model$data0 %>% 
          select(where(is.Date)) %>% 
          pull()
date_smry <- tk_get_timeseries_summary(df_dt)
future_date <- tk_make_timeseries(df_dt %>% tail(1),
                                  length_out = 1+ncol(model$response),
                                  by = date_smry$scale)
future_date <- future_date[-1]

forecast <- predict(model,newdata = model$data1 %>% slice_tail(n=1)) %>% 
  as.data.frame()
names(forecast) <- colnames(model$response)
forecast <- unstandardize(forecast,robust = TRUE,
                          reference = drop_na(model$data0)
                          ) %>% 
            as.numeric()
past_data <- model$data0 %>% 
              select(where(is.Date),all_of(real_out_cols)) %>% 
              mutate(type="actual") %>% 
              magrittr::set_names(c("date","value","type"))
forecast <- data.frame(date=future_date,value=forecast,type="forecast")
forecast <- bind_rows(past_data,forecast)
return(forecast)
}
Code
forecast_mlp_plot <- function(result,test_data){
result %>% 
  bind_rows(test_data %>% 
            mutate(type="actual") %>% 
              magrittr::set_names(c("date","value","type"))
              ) %>% 
  plot_time_series(.date_var = date,
                 .value = value,
                 .interactive = TRUE,
                 .title =  "Forecast Plot",
                 .color_var = type,
                 .legend_show = FALSE,
                 .smooth = FALSE)
}

Single Input Single Output

Pembagian data

train_df1 <-  df %>% 
            filter(date<="2023-09-01")
train_df1$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
train_df01 <-  df %>% 
            filter(date<="2023-09-01")
train_df01$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
test_df1 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-09-01")
test_df1$date
[1] "2023-12-01"

Reshaping MIMO Data

train_df1 <- prepare_mimo_data(data = train_df1,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:4,
                  remove_na = FALSE,
                  horizon = 1)
train_df1
train_df01 <- prepare_mimo_data(data = train_df01,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:4,
                  remove_na = FALSE,
                  horizon = 1)
train_df01
test_df1

Modeling

set.seed(2045)
mod1 <- train_mlp_mimo(ADHK_lead1 ~ ADHK_lag0,data=train_df1,
                      hidden = c(5),
                      activation = "linear")
plot(mod1)
set.seed(2045)
mod01 <- train_mlp_mimo(ADHK_lead1 ~ ADHK_lag0 + ADHK_lag1 + ADHK_lag2 + ADHK_lag3 + ADHK_lag4,data=train_df01,
                      hidden = c(5),
                      activation = "linear")
res1 <- forecast_mlp(model = mod1,real_out_cols="ADHK_lag0")
res1 %>% 
  filter(type=="forecast")
res01 <- forecast_mlp(model = mod01,real_out_cols="ADHK_lag0")
res01 %>% 
  filter(type=="forecast")
test_df1
rmse_vec(truth = test_df1$ADHK,
         estimate = filter(res1,type=="forecast") %>% pull(value))
[1] 1362.644
mape_vec(truth = test_df1$ADHK,
         estimate = filter(res1,type=="forecast") %>% pull(value))
[1] 5.072655
rmse_vec(truth = test_df1$ADHK,
         estimate = filter(res01,type=="forecast") %>% pull(value))
[1] 561.7437
mape_vec(truth = test_df1$ADHK,
         estimate = filter(res01,type=="forecast") %>% pull(value))
[1] 2.091179
forecast_mlp_plot(result = res01,test_data = test_df1)

Single Input Multi Output

Pembagian data

train_df2 <-  df %>% 
            filter(date<="2023-06-01")
train_df2$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df2 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-06-01")
test_df2$date
[1] "2023-09-01" "2023-12-01"

Reshaping MIMO Data

train_df2 <- prepare_mimo_data(data = train_df2,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0,
                  remove_na = FALSE,
                  horizon = 1:2)
train_df2
test_df2

Modeling

set.seed(2045)
mod2 <- train_mlp_mimo(ADHK_lead1 + ADHK_lead2 ~ ADHK_lag0,data=train_df2,
                      hidden = c(5),
                      activation = "linear")
plot(mod2)
res2 <- forecast_mlp(model = mod2,real_out_cols="ADHK_lag0")
res2 %>% 
  filter(type=="forecast")
test_df2
rmse_vec(truth = test_df2$ADHK,
         estimate = filter(res2,type=="forecast") %>% pull(value))
[1] 2476.009
mape_vec(truth = test_df2$ADHK,
         estimate = filter(res2,type=="forecast") %>% pull(value))
[1] 9.128468
forecast_mlp_plot(result = res2,test_data = test_df2)

Multi Input Multi Output

Pembagian data

train_df3 <-  df %>% 
            filter(date<="2023-06-01")
train_df3$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df3 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-06-01")
test_df3$date
[1] "2023-09-01" "2023-12-01"

Reshaping MIMO Data

train_df3 <- prepare_mimo_data(data = train_df3,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:1,
                  remove_na = FALSE,
                  horizon = 1:2)
train_df3
test_df3

Modeling

set.seed(2045)
mod3 <- train_mlp_mimo(ADHK_lead1 + ADHK_lead2 ~ ADHK_lag0 + ADHK_lag1,
                       data=train_df3,
                      hidden = c(10),
                      activation = "linear")
plot(mod3)
res3 <- forecast_mlp(model = mod3,real_out_cols="ADHK_lag0")
res3 %>% 
  filter(type=="forecast")
test_df3
rmse_vec(truth = test_df3$ADHK,
         estimate = filter(res3,type=="forecast") %>% pull(value))
[1] 2331.135
mape_vec(truth = test_df3$ADHK,
         estimate = filter(res3,type=="forecast") %>% pull(value))
[1] 8.729147
forecast_mlp_plot(result = res3,test_data = test_df3)